This data setup stolen brazenly from Jennifer Thompson.
## Load libraries we'll use
# library(devtools)
# install_github('datadotworld/data.world-r')
library(data.world) ## for querying directly from data.world
library(tidyverse) ## for data wrangling and piping
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ------------------------------------------------------------------------------------
filter(): dplyr, stats
lag(): dplyr, stats
query(): dplyr, data.world
library(rms) ## rms has nice functionality for getting predicted values from model objects
Loading required package: Hmisc
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Attaching package: 㤼㸱Hmisc㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
combine, src, summarize
The following objects are masked from 㤼㸱package:base㤼㸲:
format.pval, round.POSIXt, trunc.POSIXt, units
Loading required package: SparseM
Attaching package: 㤼㸱SparseM㤼㸲
The following object is masked from 㤼㸱package:base㤼㸲:
backsolve
library(scales)
Attaching package: 㤼㸱scales㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
discard
The following objects are masked from 㤼㸱package:readr㤼㸲:
col_factor, col_numeric
library(reshape2)
Attaching package: 㤼㸱reshape2㤼㸲
The following object is masked from 㤼㸱package:tidyr㤼㸲:
smiths
We want to determine what county characteristics may be predictors of both the county’s final winner in the 2016 presidential election, and the margin of victory by which that candidate won.
We’ll load the county characteristics and 2016 presidential election results datasets directly from data.world.
These data are mostly from the 2015 American Community Survey, with additional data from other sources. A full data dictionary can be found here.
library(data.world)
## Set connection (see package README for details: https://github.com/datadotworld/data.world-r)
conn<-data.world(read_csv('C:/Users/rkahne/Documents/data_world_api.csv')$key)
# ## What data tables are available? (both dplyr and data.world have a query(); must specify)
# data_list <- data.world::query(conn,
# dataset = 'data4democracy/election-transparency',
# query = "SELECT * FROM Tables")
# data_list
countyChar <- data.world::query(conn,
dataset = 'data4democracy/election-transparency',
query = "SELECT * FROM CountyCharacteristics")
We also want party registration data from November 2016, queried from the full PartyRegistration file. This file includes data pulled from each state’s Secretary of State web site. Full data dictionary is here.
Some of the variable names overlap with names in the next dataset; we’ll drop variables that are redundant (state/county names/abbreviations and year/month of registration) and add “Reg” to everything else except state/county keys to clarify that it’s registration info.
voterReg2016 <-
data.world::query(conn,
dataset = 'data4democracy/election-transparency',
query = "SELECT * FROM PartyRegistration WHERE Year = 2016 AND Month = 11")
voterReg2016 <- voterReg2016 %>%
select(-one_of("CountyName", "StateName", "StateAbbr", "Year", "Month", "YearMonth"))
names(voterReg2016) <- ifelse(names(voterReg2016) %in% c('State', 'County'), names(voterReg2016),
paste0(names(voterReg2016), 'Reg'))
These data are collected from a Harvard research project. A full data dictionary can be found here.
presResults2016 <- data.world::query(conn,
dataset = 'data4democracy/election-transparency',
query = "SELECT * FROM PresidentialElectionResults2016")
Let’s join the datasets, calculate some proportions, and look at some basic descriptive statistics.
## Check what variables are in common
# intersect(names(countyChar), names(voterReg2016))
# intersect(names(countyChar), names(presResults2016))
data2016 <- reduce(list(countyChar, voterReg2016, presResults2016),
left_join,
by = c('County', 'State'))
## Function to quickly calculate a proportion out of TotalPopulation - we'll need to do this a lot
prop_total <- function(x){ x / data2016$TotalPopulation }
data2016 <- data2016 %>%
## Calculate lots of proportion variables
mutate(propMale = prop_total(Male),
propKids = prop_total(Age0_4 + Age5_9 + Age10_14 + Age15_19),
propAdultsNoTeens = 1 - propKids,
## 15-19 is included in labor force, marital status questions
totalAdultsWithTeens = Age15_19 + Age20_24 + Age25_34 + Age35_44 + Age45_54 + Age55_59 +
Age60_64 + Age65_74 + Age75_84 + Age85,
propAdultsWithTeens = prop_total(totalAdultsWithTeens),
## Only >18 included in education questions
totalAdultsNoTeens = Age20_24 + Age25_34 + Age35_44 + Age45_54 + Age55_59 + Age60_64 +
Age65_74 + Age75_84 + Age85,
propElders = prop_total(Age65_74 + Age75_84 + Age85),
propNMarried = NeverMarried / totalAdultsWithTeens,
propHispanic = prop_total(Hispanic),
propWhite = prop_total(White),
propBlack = prop_total(Black),
majWhite = propWhite > 0.5,
majBlack = propBlack > 0.5,
propNoHS = (EdK8 + Ed9_12) / totalAdultsNoTeens,
propHS = EdHS / totalAdultsNoTeens,
propMoreHS = (EdCollNoDegree + EdAssocDegree + EdBachelorDegree + EdGraduateDegree) /
totalAdultsNoTeens,
propMfg2015 = MfgEmp2015 / LaborForce,
propUnemp = Unemployment / LaborForce,
propLaborForce = prop_total(LaborForce),
propStein = stein / totalvotes,
propJohnson = johnson / totalvotes,
propTrump = trump / totalvotes,
propClinton = clinton / totalvotes,
propVoters = totalvotes / totalAdultsNoTeens,
votedTrump = rPct > 0.5,
state_EV = electoral_votes[StateName])
## View full data frame
data2016
vote_by_state<-select(data2016, StateName, state_EV, clinton, trump, johnson, stein, other) %>%
melt(id= c('StateName','state_EV')) %>%
group_by(StateName, state_EV, variable) %>%
dplyr::summarize(total_vote = sum(value)) %>% # Dunno how plyr got in, but whatever.
complete(variable) %>%
ungroup() %>%
group_by(StateName) %>%
mutate(sum_total = sum(total_vote, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_vote),NA,paste0(round(total_vote / sum_total, 4)*100,'%')),
proportion_numeric = total_vote / sum_total) %>%
select(-sum_total)
candidate_order <- 'clinton' # CHANGE ME
level_order<-(filter(vote_by_state,variable == candidate_order) %>%
select(StateName, proportion_numeric) %>%
arrange(desc(proportion_numeric)))$StateName
vote_by_state$StateName<- factor(vote_by_state$StateName, levels = level_order)
vote_by_state$variable<-factor(vote_by_state$variable, levels = c('other','stein','johnson','trump','clinton'))
ggplot(vote_by_state, aes(x=StateName, y=total_vote, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('clinton' = 'blue', 'trump' = 'red', 'stein' = 'green','johnson' = 'yellow','other' = 'purple'),
name = 'Candidate',
breaks = c('other','stein','johnson','trump','clinton'),
labels = c('Others','Jill Stein','Gary Johnson','Donald J Trump','Hillary Clinton')) +
labs(x = NULL, y = 'Percent of Vote')+
scale_y_continuous(labels = scales::percent) +
ggtitle('Percent of Vote by State', subtitle='2016 USA Presidential Election') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Feel free to play with the variable in this chunk.
state<-'Tennessee'
vote_by_county<-filter(data2016, StateName == state) %>%
select(CountyName, clinton, trump, johnson, stein, other) %>%
melt(id= 'CountyName') %>%
group_by(CountyName, variable) %>%
dplyr::summarize(total_vote = sum(value)) %>% # Dunno how plyr got in, but whatever.
complete(variable) %>%
ungroup() %>%
group_by(CountyName) %>%
mutate(sum_total = sum(total_vote, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_vote),NA,paste0(round(total_vote / sum_total, 4)*100,'%')),
proportion_numeric = total_vote / sum_total) %>%
select(-sum_total)
candidate_order <- 'clinton' # CHANGE ME
level_order<-(filter(vote_by_county,variable == candidate_order) %>%
select(CountyName, proportion_numeric) %>%
arrange(desc(proportion_numeric)))$CountyName
vote_by_county$CountyName<- factor(vote_by_county$CountyName, levels = level_order)
vote_by_county$variable<-factor(vote_by_county$variable, levels = c('other','stein','johnson','trump','clinton'))
ggplot(vote_by_county, aes(x=CountyName, y=total_vote, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('clinton' = 'blue', 'trump' = 'red', 'stein' = 'green','johnson' = 'yellow','other' = 'purple'),
name = 'Candidate',
breaks = c('other','stein','johnson','trump','clinton'),
labels = c('Others','Jill Stein','Gary Johnson','Donald J Trump','Hillary Clinton')) +
labs(x = NULL, y = 'Percent of Vote')+
scale_y_continuous(labels = scales::percent) +
ggtitle(paste0('Percent of Vote by County - ',state), subtitle='2016 USA Presidential Election') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
library(ggtern)
ternary_vote<-mutate(data2016, third_party = johnson + stein + other) %>%
select(County, StateName, clinton, trump, third_party) %>%
melt(id= c('County','StateName')) %>%
group_by(County, StateName, variable) %>%
dplyr::summarize(total_vote = sum(value)) %>% # Dunno how plyr got in, but whatever.
complete(variable) %>%
ungroup() %>%
group_by(County) %>%
mutate(sum_total = sum(total_vote, na.rm=T)) %>%
mutate(proportion_numeric = total_vote / sum_total) %>%
select(-sum_total, -total_vote) %>%
spread(key = variable, value = proportion_numeric)
|========================= | 20% ~8 s remaining
|========================== | 21% ~8 s remaining
|=========================== | 21% ~8 s remaining
|=========================== | 22% ~8 s remaining
|============================ | 22% ~8 s remaining
|============================= | 23% ~8 s remaining
|============================== | 24% ~8 s remaining
|============================== | 24% ~8 s remaining
|=============================== | 25% ~8 s remaining
|================================ | 25% ~8 s remaining
|================================ | 26% ~8 s remaining
|================================= | 26% ~8 s remaining
|================================= | 26% ~8 s remaining
|================================== | 27% ~8 s remaining
|=================================== | 28% ~7 s remaining
|==================================== | 29% ~7 s remaining
|===================================== | 29% ~7 s remaining
|===================================== | 29% ~7 s remaining
|====================================== | 30% ~7 s remaining
|======================================= | 31% ~7 s remaining
|======================================== | 32% ~7 s remaining
|========================================= | 33% ~7 s remaining
|========================================== | 33% ~7 s remaining
|=========================================== | 34% ~7 s remaining
|============================================ | 35% ~7 s remaining
|============================================= | 35% ~6 s remaining
|============================================== | 36% ~6 s remaining
|=============================================== | 37% ~6 s remaining
|================================================ | 38% ~6 s remaining
|================================================ | 38% ~6 s remaining
|================================================= | 39% ~6 s remaining
|================================================== | 40% ~6 s remaining
|=================================================== | 40% ~6 s remaining
|=================================================== | 41% ~6 s remaining
|==================================================== | 41% ~6 s remaining
|===================================================== | 42% ~6 s remaining
|====================================================== | 42% ~6 s remaining
|======================================================= | 43% ~6 s remaining
|======================================================= | 44% ~6 s remaining
|======================================================== | 44% ~6 s remaining
|========================================================= | 45% ~6 s remaining
|========================================================= | 45% ~6 s remaining
|========================================================== | 45% ~6 s remaining
|========================================================== | 46% ~5 s remaining
|=========================================================== | 46% ~5 s remaining
|=========================================================== | 46% ~5 s remaining
|=========================================================== | 47% ~5 s remaining
|============================================================ | 47% ~5 s remaining
|============================================================= | 48% ~5 s remaining
|============================================================== | 49% ~5 s remaining
|============================================================== | 49% ~5 s remaining
|=============================================================== | 49% ~5 s remaining
|=============================================================== | 50% ~5 s remaining
|================================================================ | 50% ~5 s remaining
|================================================================ | 50% ~5 s remaining
|================================================================= | 51% ~5 s remaining
|================================================================== | 52% ~5 s remaining
|================================================================== | 52% ~5 s remaining
|=================================================================== | 53% ~5 s remaining
|==================================================================== | 53% ~5 s remaining
|==================================================================== | 54% ~5 s remaining
|==================================================================== | 54% ~5 s remaining
|===================================================================== | 54% ~5 s remaining
|===================================================================== | 55% ~5 s remaining
|====================================================================== | 55% ~5 s remaining
|======================================================================= | 56% ~5 s remaining
|======================================================================= | 56% ~5 s remaining
|======================================================================== | 57% ~5 s remaining
|========================================================================= | 57% ~5 s remaining
|========================================================================= | 58% ~4 s remaining
|========================================================================== | 58% ~4 s remaining
|=========================================================================== | 59% ~4 s remaining
|=========================================================================== | 59% ~4 s remaining
|============================================================================ | 60% ~4 s remaining
|============================================================================ | 60% ~4 s remaining
|============================================================================= | 61% ~4 s remaining
|============================================================================== | 61% ~4 s remaining
|============================================================================== | 61% ~4 s remaining
|=============================================================================== | 62% ~4 s remaining
|=============================================================================== | 62% ~4 s remaining
|================================================================================ | 63% ~4 s remaining
|================================================================================ | 63% ~4 s remaining
|================================================================================= | 63% ~4 s remaining
|================================================================================= | 64% ~4 s remaining
|================================================================================== | 64% ~4 s remaining
|================================================================================== | 65% ~4 s remaining
|=================================================================================== | 65% ~4 s remaining
|=================================================================================== | 65% ~4 s remaining
|==================================================================================== | 66% ~4 s remaining
|===================================================================================== | 66% ~4 s remaining
|===================================================================================== | 67% ~4 s remaining
|====================================================================================== | 68% ~4 s remaining
|======================================================================================= | 68% ~3 s remaining
|======================================================================================= | 68% ~3 s remaining
|======================================================================================== | 69% ~3 s remaining
|======================================================================================== | 69% ~3 s remaining
|========================================================================================= | 70% ~3 s remaining
|========================================================================================== | 70% ~3 s remaining
|========================================================================================== | 71% ~3 s remaining
|=========================================================================================== | 71% ~3 s remaining
|=========================================================================================== | 72% ~3 s remaining
|=========================================================================================== | 72% ~3 s remaining
|============================================================================================ | 72% ~3 s remaining
|============================================================================================ | 72% ~3 s remaining
|============================================================================================ | 72% ~3 s remaining
|============================================================================================= | 73% ~3 s remaining
|============================================================================================= | 73% ~3 s remaining
|============================================================================================== | 74% ~3 s remaining
|=============================================================================================== | 74% ~3 s remaining
|=============================================================================================== | 75% ~3 s remaining
|================================================================================================ | 75% ~3 s remaining
|================================================================================================ | 75% ~3 s remaining
|================================================================================================ | 76% ~3 s remaining
|================================================================================================= | 76% ~3 s remaining
|================================================================================================== | 77% ~3 s remaining
|================================================================================================== | 77% ~3 s remaining
|================================================================================================== | 77% ~3 s remaining
|=================================================================================================== | 78% ~3 s remaining
|==================================================================================================== | 78% ~3 s remaining
|==================================================================================================== | 79% ~2 s remaining
|==================================================================================================== | 79% ~2 s remaining
|===================================================================================================== | 79% ~2 s remaining
|====================================================================================================== | 80% ~2 s remaining
|====================================================================================================== | 80% ~2 s remaining
|======================================================================================================= | 81% ~2 s remaining
|======================================================================================================== | 82% ~2 s remaining
|========================================================================================================= | 82% ~2 s remaining
|========================================================================================================= | 82% ~2 s remaining
|========================================================================================================= | 82% ~2 s remaining
|========================================================================================================= | 83% ~2 s remaining
|========================================================================================================== | 83% ~2 s remaining
|=========================================================================================================== | 84% ~2 s remaining
|============================================================================================================ | 85% ~2 s remaining
|============================================================================================================= | 86% ~2 s remaining
|============================================================================================================== | 86% ~2 s remaining
|=============================================================================================================== | 87% ~2 s remaining
|================================================================================================================ | 88% ~1 s remaining
|================================================================================================================ | 88% ~1 s remaining
|================================================================================================================= | 89% ~1 s remaining
|================================================================================================================= | 89% ~1 s remaining
|================================================================================================================= | 89% ~1 s remaining
|================================================================================================================= | 89% ~1 s remaining
|================================================================================================================== | 89% ~1 s remaining
|=================================================================================================================== | 90% ~1 s remaining
|=================================================================================================================== | 90% ~1 s remaining
|=================================================================================================================== | 90% ~1 s remaining
|==================================================================================================================== | 91% ~1 s remaining
|==================================================================================================================== | 91% ~1 s remaining
|===================================================================================================================== | 92% ~1 s remaining
|===================================================================================================================== | 92% ~1 s remaining
|====================================================================================================================== | 92% ~1 s remaining
|====================================================================================================================== | 93% ~1 s remaining
|======================================================================================================================= | 94% ~1 s remaining
|======================================================================================================================== | 94% ~1 s remaining
|========================================================================================================================== | 95% ~1 s remaining
|=========================================================================================================================== | 96% ~0 s remaining
|============================================================================================================================ | 97% ~0 s remaining
|============================================================================================================================= | 98% ~0 s remaining
|============================================================================================================================== | 99% ~0 s remaining
|============================================================================================================================== | 99% ~0 s remaining
|=============================================================================================================================== | 99% ~0 s remaining
|=============================================================================================================================== |100% ~0 s remaining
ternary_vote$StateName<-sapply(ternary_vote$StateName, function(i) ifelse(i=='Utah','Utah','Not Utah'))
ggtern(ternary_vote, aes(x=clinton, y=trump, z=third_party, color=StateName))+
geom_point(aes(alpha = 0.2))
Not all states have partisan registration.
party_registration <- select(voterReg2016, State, DReg, RReg, OReg, GReg, LReg, NReg) %>%
melt(id= 'State') %>%
group_by(State, variable) %>%
dplyr::summarize(total_reg = sum(value)) %>%
complete(variable) %>%
ungroup() %>%
group_by(State) %>%
mutate(sum_total = sum(total_reg, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_reg),NA,paste0(round(total_reg / sum_total, 4)*100,'%')),
proportion_numeric = total_reg / sum_total) %>%
select(-sum_total)
party_registration$State<-sapply(party_registration$State, function(i) names(state_names)[which(state_names == i)])
non_partisan_states<-(filter(party_registration, variable == 'NReg') %>%
filter(proportion_numeric == 1))$State
party_registration <- filter(party_registration, !(State %in% non_partisan_states))
party_order <- 'DReg' # CHANGE ME
level_order<-(filter(party_registration,variable == party_order) %>%
select(State, proportion_numeric) %>%
arrange(desc(proportion_numeric)))$State
party_registration$State<- factor(party_registration$State, levels = level_order)
party_registration$variable<-factor(party_registration$variable, levels = c('NReg','OReg','GReg','LReg','RReg','DReg'))
ggplot(party_registration, aes(x=State, y=total_reg, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('DReg' = 'blue', 'RReg' = 'red', 'GReg' = 'green','LReg' = 'yellow','OReg' = 'purple','NReg' = 'grey'),
name = 'Candidate',
breaks = c('NReg','OReg','GReg','LReg','RReg','DReg'),
labels = c('No Party','Other Party','Green','Libertarian','Republican','Democratic')) +
labs(x = NULL, y = 'Percent of Registered Voters')+
scale_y_continuous(labels = scales::percent) +
ggtitle('Percent of Voter Registration by State', subtitle='2016') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Looks like we have a bit of an issue here with Other and Non-partisan registration. NE and WV are all “Other”.
Feel free to play with the variable in this.
state <- 'West Virginia'
party_registration_c <- filter(data2016, StateName == state) %>%
select(CountyName, DReg, RReg, OReg, GReg, LReg, NReg) %>%
melt(id= 'CountyName') %>%
group_by(CountyName, variable) %>%
dplyr::summarize(total_reg = sum(value)) %>%
complete(variable) %>%
ungroup() %>%
group_by(CountyName) %>%
mutate(sum_total = sum(total_reg, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_reg),NA,paste0(round(total_reg / sum_total, 4)*100,'%')),
proportion_numeric = total_reg / sum_total) %>%
select(-sum_total)
party_order <- 'DReg' # CHANGE ME
level_order<-(filter(party_registration_c,variable == party_order) %>%
select(CountyName, proportion_numeric) %>%
arrange(desc(proportion_numeric)))$CountyName
party_registration_c$CountyName<- factor(party_registration_c$CountyName, levels = level_order)
party_registration_c$variable<-factor(party_registration_c$variable, levels = c('NReg','OReg','GReg','LReg','RReg','DReg'))
ggplot(party_registration_c, aes(x=CountyName, y=total_reg, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('DReg' = 'blue', 'RReg' = 'red', 'GReg' = 'green','LReg' = 'yellow','OReg' = 'purple','NReg' = 'grey'),
name = 'Candidate',
breaks = c('NReg','OReg','GReg','LReg','RReg','DReg'),
labels = c('No Party','Other Party','Green','Libertarian','Republican','Democratic')) +
labs(x = NULL, y = 'Percent of Registered Voters')+
scale_y_continuous(labels = scales::percent) +
ggtitle('Percent of Voter Registration by County', subtitle=paste0(state,' - 2016')) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
turnout_data <- group_by(data2016, StateName) %>%
dplyr::summarize(total_votes = sum(clinton,trump,johnson,stein,other, na.rm=T),
total_reg_no_vote = sum(DReg,RReg,OReg,GReg,LReg,NReg, na.rm=T)-total_votes,
total_not_registered = ifelse(sum(TotalPopulation)- sum(Age0_4)- sum(Age5_9)- sum(Age10_14) - sum(Age15_19) -
total_reg_no_vote - total_votes<0,NA,sum(TotalPopulation)- sum(Age0_4)- sum(Age5_9)- sum(Age10_14) -
sum(Age15_19) - total_reg_no_vote - total_votes)) %>%
melt(id = 'StateName') %>%
group_by(StateName, variable) %>%
dplyr::summarize(total_pop = sum(value)) %>%
complete(variable) %>%
ungroup() %>%
group_by(StateName) %>%
mutate(sum_total = sum(total_pop, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_pop),NA,paste0(round(total_pop / sum_total, 4)*100,'%')),
proportion_numeric = total_pop / sum_total) %>%
select(-sum_total)
turnout_data$variable<-factor(turnout_data$variable, levels = c('total_not_registered','total_reg_no_vote','total_votes'))
ggplot(turnout_data, aes(x=StateName, y=total_pop, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('total_votes' = 'green', 'total_reg_no_vote' = 'purple', 'total_not_registered' = 'grey'),
name = 'Population',
breaks = c('total_votes','total_reg_no_vote','total_not_registered'),
labels = c('Voted','Registered, but did not vote','Not Registered')) +
labs(x = NULL, y = 'Percent of Population')+
scale_y_continuous(labels = scales::percent) +
ggtitle('Voter Turnout by State', subtitle='2016') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
state <- 'Kentucky'
turnout_data_c <- filter(data2016, StateName == state) %>%
group_by(CountyName) %>%
dplyr::summarize(total_votes = sum(clinton,trump,johnson,stein,other, na.rm=T),
total_reg_no_vote = sum(DReg,RReg,OReg,GReg,LReg,NReg, na.rm=T)-total_votes,
total_not_registered = ifelse(sum(TotalPopulation)- sum(Age0_4)- sum(Age5_9)- sum(Age10_14) - sum(Age15_19) -
total_reg_no_vote - total_votes<0,NA,sum(TotalPopulation)- sum(Age0_4)- sum(Age5_9)- sum(Age10_14) -
sum(Age15_19) - total_reg_no_vote - total_votes)) %>%
melt(id = 'CountyName') %>%
group_by(CountyName, variable) %>%
dplyr::summarize(total_pop = sum(value)) %>%
complete(variable) %>%
ungroup() %>%
group_by(CountyName) %>%
mutate(sum_total = sum(total_pop, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_pop),NA,paste0(round(total_pop / sum_total, 4)*100,'%')),
proportion_numeric = total_pop / sum_total) %>%
select(-sum_total)
turnout_data_c$variable<-factor(turnout_data_c$variable, levels = c('total_not_registered','total_reg_no_vote','total_votes'))
ggplot(turnout_data_c, aes(x=CountyName, y=total_pop, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('total_votes' = 'green', 'total_reg_no_vote' = 'purple', 'total_not_registered' = 'grey'),
name = 'Population',
breaks = c('total_votes','total_reg_no_vote','total_not_registered'),
labels = c('Voted','Registered, but did not vote','Not Registered')) +
labs(x = NULL, y = 'Percent of Population')+
scale_y_continuous(labels = scales::percent) +
ggtitle('Voter Turnout by County', subtitle=paste0(state,', 2016')) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Demo_Factors <- select(data2016, County, rDRPct, NCHS_UrbanRural2013, TotalPopulation, MedianAge, MedianHouseholdIncome, SimpsonDiversityIndex, propWhite, propBlack, propMfg2015, propHS, propMoreHS, propUnemp, propElders, propNMarried)
lm(rDRPct~.,select(Demo_Factors,-County)) %>% summary()
Call:
lm(formula = rDRPct ~ ., data = select(Demo_Factors, -County))
Residuals:
Min 1Q Median 3Q Max
-0.60295 -0.04971 0.00257 0.05506 0.34930
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.032e+00 5.994e-02 17.218 < 2e-16 ***
NCHS_UrbanRural2013Large fringe metro 2.919e-02 1.512e-02 1.931 0.053553 .
NCHS_UrbanRural2013Medium metro 1.546e-02 1.505e-02 1.027 0.304418
NCHS_UrbanRural2013Micropolitan (nonmetropolitan) 3.160e-02 1.549e-02 2.040 0.041463 *
NCHS_UrbanRural2013Noncore (nonmetropolitan) 5.146e-02 1.566e-02 3.286 0.001031 **
NCHS_UrbanRural2013Small metro 3.236e-02 1.558e-02 2.077 0.037899 *
TotalPopulation -4.223e-08 6.863e-09 -6.154 8.68e-10 ***
MedianAge -1.429e-02 9.947e-04 -14.370 < 2e-16 ***
MedianHouseholdIncome -6.213e-07 2.608e-07 -2.382 0.017284 *
SimpsonDiversityIndex 2.227e-01 2.577e-02 8.641 < 2e-16 ***
propWhite 5.219e-01 3.796e-02 13.748 < 2e-16 ***
propBlack 9.281e-02 2.814e-02 3.298 0.000987 ***
propMfg2015 -2.879e-04 5.838e-04 -0.493 0.621911
propHS 3.440e-01 5.481e-02 6.275 4.04e-10 ***
propMoreHS -2.194e-01 4.255e-02 -5.156 2.70e-07 ***
propUnemp 3.636e-05 5.378e-04 0.068 0.946102
propElders 5.882e-01 1.140e-01 5.159 2.66e-07 ***
propNMarried -1.490e+00 4.423e-02 -33.682 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0898 on 2736 degrees of freedom
(387 observations deleted due to missingness)
Multiple R-squared: 0.6721, Adjusted R-squared: 0.6701
F-statistic: 329.9 on 17 and 2736 DF, p-value: < 2.2e-16
Demo_Factors <- select(data2016, County, trump, clinton, johnson, stein, other, NCHS_UrbanRural2013, TotalPopulation, MedianAge, MedianHouseholdIncome, SimpsonDiversityIndex, propWhite, propBlack, propMfg2015, propHS, propMoreHS, propUnemp, propElders, propNMarried)
vote_by_county_f<-select(Demo_Factors, County, clinton, trump, johnson, stein, other) %>%
melt(id= c('County')) %>%
group_by(County, variable) %>%
dplyr::summarize(total_vote = sum(value)) %>% # Dunno how plyr got in, but whatever.
complete(variable) %>%
ungroup() %>%
group_by(County) %>%
mutate(sum_total = sum(total_vote, na.rm=T)) %>%
mutate(proportion = ifelse(is.na(total_vote),NA,paste0(round(total_vote / sum_total, 4)*100,'%')),
proportion_numeric = total_vote / sum_total) %>%
select(-sum_total)
|========================= | 20% ~8 s remaining
|========================== | 20% ~8 s remaining
|========================== | 21% ~8 s remaining
|=========================== | 22% ~8 s remaining
|============================ | 22% ~8 s remaining
|============================= | 23% ~8 s remaining
|============================== | 23% ~8 s remaining
|============================== | 24% ~8 s remaining
|============================== | 24% ~8 s remaining
|============================== | 24% ~8 s remaining
|================================ | 25% ~8 s remaining
|================================ | 26% ~8 s remaining
|================================= | 26% ~8 s remaining
|================================= | 26% ~8 s remaining
|================================== | 27% ~8 s remaining
|=================================== | 28% ~8 s remaining
|==================================== | 29% ~8 s remaining
|===================================== | 29% ~8 s remaining
|===================================== | 29% ~8 s remaining
|====================================== | 30% ~8 s remaining
|====================================== | 30% ~8 s remaining
|======================================= | 31% ~8 s remaining
|======================================= | 31% ~8 s remaining
|======================================== | 32% ~7 s remaining
|========================================= | 33% ~7 s remaining
|========================================== | 33% ~7 s remaining
|========================================== | 34% ~7 s remaining
|=========================================== | 34% ~7 s remaining
|============================================ | 35% ~7 s remaining
|============================================ | 35% ~7 s remaining
|============================================ | 35% ~8 s remaining
|============================================= | 35% ~8 s remaining
|============================================= | 35% ~8 s remaining
|============================================= | 36% ~8 s remaining
|============================================== | 36% ~8 s remaining
|============================================== | 37% ~8 s remaining
|=============================================== | 37% ~8 s remaining
|================================================ | 38% ~8 s remaining
|================================================ | 38% ~8 s remaining
|================================================ | 38% ~8 s remaining
|================================================= | 38% ~8 s remaining
|================================================= | 38% ~8 s remaining
|================================================= | 39% ~8 s remaining
|================================================= | 39% ~8 s remaining
|================================================== | 39% ~8 s remaining
|=================================================== | 40% ~8 s remaining
|==================================================== | 41% ~7 s remaining
|==================================================== | 41% ~7 s remaining
|==================================================== | 41% ~7 s remaining
|===================================================== | 42% ~7 s remaining
|===================================================== | 42% ~7 s remaining
|===================================================== | 42% ~8 s remaining
|===================================================== | 42% ~8 s remaining
|===================================================== | 42% ~8 s remaining
|====================================================== | 42% ~8 s remaining
|====================================================== | 43% ~8 s remaining
|======================================================= | 43% ~8 s remaining
|======================================================= | 43% ~8 s remaining
|======================================================= | 43% ~8 s remaining
|======================================================= | 44% ~8 s remaining
|======================================================== | 44% ~8 s remaining
|======================================================== | 45% ~8 s remaining
|========================================================= | 45% ~7 s remaining
|========================================================== | 46% ~7 s remaining
|========================================================== | 46% ~8 s remaining
|========================================================== | 46% ~8 s remaining
|=========================================================== | 46% ~7 s remaining
|=========================================================== | 47% ~7 s remaining
|============================================================ | 47% ~7 s remaining
|============================================================= | 48% ~7 s remaining
|============================================================= | 48% ~7 s remaining
|============================================================== | 49% ~7 s remaining
|=============================================================== | 49% ~7 s remaining
|=============================================================== | 50% ~7 s remaining
|================================================================ | 50% ~7 s remaining
|================================================================ | 51% ~7 s remaining
|================================================================= | 51% ~7 s remaining
|================================================================= | 51% ~7 s remaining
|================================================================== | 52% ~7 s remaining
|=================================================================== | 52% ~7 s remaining
|=================================================================== | 53% ~6 s remaining
|==================================================================== | 53% ~6 s remaining
|==================================================================== | 54% ~6 s remaining
|===================================================================== | 54% ~6 s remaining
|====================================================================== | 55% ~6 s remaining
|====================================================================== | 55% ~6 s remaining
|======================================================================= | 56% ~6 s remaining
|======================================================================= | 56% ~6 s remaining
|======================================================================= | 56% ~6 s remaining
|======================================================================== | 57% ~6 s remaining
|======================================================================== | 57% ~6 s remaining
|========================================================================= | 58% ~6 s remaining
|========================================================================= | 58% ~6 s remaining
|========================================================================== | 58% ~6 s remaining
|========================================================================== | 59% ~6 s remaining
|=========================================================================== | 59% ~6 s remaining
|============================================================================ | 60% ~6 s remaining
|============================================================================= | 60% ~5 s remaining
|============================================================================== | 61% ~5 s remaining
|============================================================================== | 61% ~5 s remaining
|============================================================================== | 62% ~5 s remaining
|=============================================================================== | 62% ~5 s remaining
|================================================================================ | 63% ~5 s remaining
|================================================================================= | 63% ~5 s remaining
|================================================================================== | 64% ~5 s remaining
|================================================================================== | 65% ~5 s remaining
|=================================================================================== | 65% ~5 s remaining
|=================================================================================== | 66% ~5 s remaining
|==================================================================================== | 66% ~5 s remaining
|===================================================================================== | 67% ~5 s remaining
|===================================================================================== | 67% ~5 s remaining
|===================================================================================== | 67% ~5 s remaining
|====================================================================================== | 68% ~4 s remaining
|======================================================================================= | 68% ~4 s remaining
|======================================================================================= | 69% ~4 s remaining
|======================================================================================= | 69% ~4 s remaining
|======================================================================================== | 69% ~4 s remaining
|========================================================================================= | 70% ~4 s remaining
|=========================================================================================== | 71% ~4 s remaining
|============================================================================================ | 72% ~4 s remaining
|============================================================================================= | 73% ~4 s remaining
|=============================================================================================== | 74% ~3 s remaining
|=============================================================================================== | 75% ~3 s remaining
|================================================================================================ | 76% ~3 s remaining
|================================================================================================== | 77% ~3 s remaining
|=================================================================================================== | 77% ~3 s remaining
|=================================================================================================== | 78% ~3 s remaining
|=================================================================================================== | 78% ~3 s remaining
|==================================================================================================== | 78% ~3 s remaining
|===================================================================================================== | 79% ~3 s remaining
|===================================================================================================== | 79% ~3 s remaining
|====================================================================================================== | 80% ~3 s remaining
|======================================================================================================= | 81% ~2 s remaining
|======================================================================================================== | 82% ~2 s remaining
|========================================================================================================== | 83% ~2 s remaining
|=========================================================================================================== | 84% ~2 s remaining
|=========================================================================================================== | 84% ~2 s remaining
|============================================================================================================ | 85% ~2 s remaining
|============================================================================================================= | 85% ~2 s remaining
|============================================================================================================== | 86% ~2 s remaining
|=============================================================================================================== | 87% ~2 s remaining
|================================================================================================================ | 88% ~2 s remaining
|================================================================================================================ | 88% ~2 s remaining
|================================================================================================================ | 88% ~2 s remaining
|================================================================================================================= | 88% ~2 s remaining
|================================================================================================================= | 89% ~1 s remaining
|================================================================================================================== | 89% ~1 s remaining
|=================================================================================================================== | 90% ~1 s remaining
|==================================================================================================================== | 91% ~1 s remaining
|===================================================================================================================== | 92% ~1 s remaining
|===================================================================================================================== | 92% ~1 s remaining
|===================================================================================================================== | 92% ~1 s remaining
|====================================================================================================================== | 92% ~1 s remaining
|====================================================================================================================== | 93% ~1 s remaining
|======================================================================================================================= | 93% ~1 s remaining
|======================================================================================================================== | 94% ~1 s remaining
|======================================================================================================================== | 94% ~1 s remaining
|========================================================================================================================= | 95% ~1 s remaining
|========================================================================================================================== | 96% ~1 s remaining
|========================================================================================================================== | 96% ~1 s remaining
|========================================================================================================================== | 96% ~1 s remaining
|=========================================================================================================================== | 96% ~0 s remaining
|============================================================================================================================ | 97% ~0 s remaining
|============================================================================================================================ | 97% ~0 s remaining
|============================================================================================================================ | 98% ~0 s remaining
|============================================================================================================================== | 98% ~0 s remaining
|============================================================================================================================== | 99% ~0 s remaining
|=============================================================================================================================== | 99% ~0 s remaining
vote_by_county_f <- left_join(vote_by_county_f, Demo_Factors, by='County')
candidate_order <- 'clinton' # CHANGE ME
level_order<-(filter(vote_by_county_f,variable == candidate_order) %>%
select(County, proportion_numeric) %>%
arrange(desc(proportion_numeric)))$County
vote_by_county_f$County<- factor(vote_by_county_f$County, levels = level_order)
vote_by_county_f$variable<-factor(vote_by_county_f$variable, levels = c('other','stein','johnson','trump','clinton'))
# ggplot(filter(vote_by_county_f, !is.na(NCHS_UrbanRural2013)), aes(x=County, y=total_vote, fill=variable)) +
# geom_bar(stat='identity', position = 'fill') +
# scale_fill_manual(values = c('clinton' = 'blue', 'trump' = 'red', 'stein' = 'green','johnson' = 'yellow','other' = 'purple'),
# name = 'Candidate',
# breaks = c('other','stein','johnson','trump','clinton'),
# labels = c('Others','Jill Stein','Gary Johnson','Donald J Trump','Hillary Clinton')) +
# labs(x = NULL, y = 'Percent of Vote')+
# scale_y_continuous(labels = scales::percent) +
# ggtitle('Percent of Vote by County', subtitle='2016 USA Presidential Election') +
# theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
# facet_wrap(~NCHS_UrbanRural2013, scales = 'free_x')
facetting_variable <- 'MedianAge' #Change Me
vote_by_county_f$fct <- cut(vote_by_county_f[[facetting_variable]],
breaks = c(0,quantile(vote_by_county_f[[facetting_variable]],
probs = c(0.1, 0.25, 0.5, 0.75, 0.9),
na.rm = T),Inf),
ordered_result = T,
labels = c('Lowest 10%','10th - 25th %ile',
'25th %ile - Median','Median - 75th %ile',
'75th - 90th %ile', 'Top 10%'))
ggplot(filter(vote_by_county_f, !is.na(fct)), aes(x=County, y=total_vote, fill=variable)) +
geom_bar(stat='identity', position = 'fill') +
scale_fill_manual(values = c('clinton' = 'blue', 'trump' = 'red', 'stein' = 'green','johnson' = 'yellow','other' = 'purple'),
name = 'Candidate',
breaks = c('other','stein','johnson','trump','clinton'),
labels = c('Others','Jill Stein','Gary Johnson','Donald J Trump','Hillary Clinton')) +
labs(x = NULL, y = 'Percent of Vote')+
scale_y_continuous(labels = scales::percent) +
ggtitle(paste0('Percent of Vote by County ~ ',facetting_variable), subtitle='2016 USA Presidential Election') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~fct, scales = 'free_x')
states_by_region <- read_csv('./states_by_region.csv')
data2016 <- left_join(data2016, select(states_by_region, -`State Code`), by = c('StateName'='State'))
Demo_Factors <- mutate(data2016, MfgEmpChange1980_2015 = (MfgEmp2015/TotalEmp2015) - (MfgEmp1980/TotalEmp1980)) %>%
select(County, rDRPct, Region, MfgEmpChange1980_2015, NCHS_UrbanRural2013, TotalPopulation, MedianAge, MedianHouseholdIncome, SimpsonDiversityIndex, propWhite, propBlack, propMfg2015, propHS, propMoreHS, propUnemp, propElders, propNMarried)
Demo_Factors$Region<- as.factor(Demo_Factors$Region)
Demo_Factors_m <- melt(select(Demo_Factors, -NCHS_UrbanRural2013), id = c('County','rDRPct'))
r_squareds <- data_frame(variable = unique(Demo_Factors_m$variable))
r_squareds$r_square <- sapply(r_squareds$variable, function(i){
percent_format()(summary(lm(unlist(Demo_Factors[as.character(i)])~Demo_Factors$rDRPct))$r.squared)
})
Demo_Factors_m <- left_join(Demo_Factors_m, r_squareds, by='variable')
ggplot(Demo_Factors_m, aes(x=value, y=rDRPct)) +
geom_point() +
geom_smooth(method = 'lm') +
facet_wrap(~variable, scales = 'free_x')
# pulled from https://github.com/cphalpert/census-regions/blob/master/us%20census%20bureau%20regions%20and%20divisions.csv
states_by_region <- read_csv('./states_by_region.csv')
data2016 <- left_join(data2016, select(states_by_region, -`State Code`), by = c('StateName'='State'))
Demo_Factors <- mutate(data2016, MfgEmpChange1980_2015 = (MfgEmp2015/TotalEmp2015) - (MfgEmp1980/TotalEmp1980),
region_division = paste(Region,'-',Division)) %>%
select(County, rDRPct, Region, region_division, MfgEmpChange1980_2015, NCHS_UrbanRural2013, TotalPopulation, MedianAge,
MedianHouseholdIncome, SimpsonDiversityIndex, propWhite, propBlack, propMfg2015, propHS, propMoreHS, propUnemp,
propElders, propNMarried)
Demo_Factors$Region<- as.factor(Demo_Factors$Region)
Demo_Factors$region_division <- as.factor(Demo_Factors$region_division)
Demo_Factors_m2 <- melt(select(Demo_Factors, County, rDRPct, MfgEmpChange1980_2015, propWhite, region_division),
id = c('County','rDRPct','MfgEmpChange1980_2015'))
regional_mfg<-filter(Demo_Factors_m2, variable == 'region_division', !is.na(MfgEmpChange1980_2015)) %>% select(-variable)
ggplot(regional_mfg, aes(x=MfgEmpChange1980_2015, y=rDRPct))+
geom_point()+
geom_smooth(method = 'lm')+
facet_wrap(~value)
ggplot(regional_mfg, aes(x=value, y=MfgEmpChange1980_2015)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Demo_Factors<-left_join(Demo_Factors, mutate(data2016, BlindDisabledSSI_p = BlindDisabledSSI / TotalPopulation)
%>% select(County, BlindDisabledSSI_p))
summary(lm(rDRPct ~ Region + propWhite + NCHS_UrbanRural2013 + propNMarried, Demo_Factors))
Call:
lm(formula = rDRPct ~ Region + propWhite + NCHS_UrbanRural2013 +
propNMarried, data = Demo_Factors)
Residuals:
Min 1Q Median 3Q Max
-0.59819 -0.05725 0.01011 0.06696 0.37174
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.440370 0.026992 16.315 < 2e-16 ***
RegionNortheast -0.066455 0.007889 -8.424 < 2e-16 ***
RegionSouth 0.063649 0.004628 13.752 < 2e-16 ***
RegionWest -0.024467 0.005854 -4.180 3.00e-05 ***
propWhite 0.373048 0.016642 22.416 < 2e-16 ***
NCHS_UrbanRural2013Large fringe metro 0.082674 0.013900 5.948 3.02e-09 ***
NCHS_UrbanRural2013Medium metro 0.096519 0.013858 6.965 3.99e-12 ***
NCHS_UrbanRural2013Micropolitan (nonmetropolitan) 0.141172 0.013474 10.477 < 2e-16 ***
NCHS_UrbanRural2013Noncore (nonmetropolitan) 0.160562 0.013363 12.016 < 2e-16 ***
NCHS_UrbanRural2013Small metro 0.129140 0.013907 9.286 < 2e-16 ***
propNMarried -0.906555 0.038321 -23.657 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1023 on 3128 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.6021, Adjusted R-squared: 0.6008
F-statistic: 473.3 on 10 and 3128 DF, p-value: < 2.2e-16
summary(lm(rDRPct ~ BlindDisabledSSI_p + propDiscouraged + MfgEmpChange1980_2015, Demo_Factors))
Call:
lm(formula = rDRPct ~ BlindDisabledSSI_p + propDiscouraged +
MfgEmpChange1980_2015, data = Demo_Factors)
Residuals:
Min 1Q Median 3Q Max
-0.59099 -0.08812 0.02904 0.11475 0.32123
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.661338 0.006841 96.679 < 2e-16 ***
BlindDisabledSSI_p -1.027191 0.253088 -4.059 5.09e-05 ***
propDiscouraged 0.241958 0.056277 4.299 1.78e-05 ***
MfgEmpChange1980_2015 0.147426 0.036760 4.010 6.25e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1537 on 2396 degrees of freedom
(741 observations deleted due to missingness)
Multiple R-squared: 0.01473, Adjusted R-squared: 0.0135
F-statistic: 11.94 on 3 and 2396 DF, p-value: 9.228e-08